# 导入http工具包
import requests
# 导入靓汤，操作html
from bs4 import BeautifulSoup
# 导入excel 写入工具包
import xlwt
import pyautogui
import time
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import selenium.webdriver.support.ui as ui
from selenium.webdriver.chrome.options import Options
import time
import random
import _thread
import sys
import logging

# 滚动函数
def scroll(count,browser):
    index = 1
    while index <= count:
        time.sleep(0.2)
        height = index * 400
        browser.execute_script("window.scrollTo(0,"+str(height)+")")
        index = index + 1
    pass
# 配置浏览器
def create_browser():
    # 浏览器参数优化
    chrome_options = Options()
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--disable-extensions")
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--disable-software-rasterizer")
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--ignore-certificate-errors')
    chrome_options.add_argument('--allow-running-insecure-content')
    chrome_options.add_argument("blink-settings=imagesEnabled=false")
    # browser = webdriver.PhantomJS()
    browser = webdriver.Chrome(options=chrome_options)
    # 需要设置浏览器的window.navigator.webdriver=undefined，不然无法通过滑块检查（速卖通加了webdriver禁止）
    # https://blog.csdn.net/weixin_43881394/article/details/108467118?spm=1005.2026.3001.5635&utm_medium=distribute.pc_relevant_ask_down.none-task-blog-2~default~OPENSEARCH~Rate-5.pc_feed_download_top3ask&depth_1-utm_source=distribute.pc_relevant_ask_down.none-task-blog-2~default~OPENSEARCH~Rate-5.pc_feed_download_top3ask
    # browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    #    "source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})""",
    # })
    # 最大化
    browser.maximize_window()
    # 最小化
    # browser.minimize_window()
    return browser
    pass

# 获取数据
def get_data(browser,globalConfig):
    # 保证加载到满屏的60个产品
    data = browser.find_elements_by_class_name('_3t7zg')
    print("原始数量："+str(len(data)))

    # 定义行数据
    rows = []
    for one in data:
        # 定义row 字典
        row = {}
        # 链接地址
        href = one.get_attribute("href")
        row['href'] = href.split("?")[0]
        # 标题
        title = one.find_element_by_class_name("_18_85")
        row['title'] = title.text
        titleFlag = False
        if globalConfig['rules']['title']['notIn'] in title.text:
            titleFlag = True
        # 新品
        text = one.text
        if "New" in text:
            # isNew = one.find_element_by_class_name("i0heB")
            row['isNew'] = True
        else:
            row['isNew'] = False
        # 店铺
        store = one.find_element_by_class_name("ox0KZ")
        row['storeHref'] = store.get_attribute("href")
        row['storeName'] = store.text
        # 销量
        sold = one.find_element_by_class_name("_1kNf9")
        row['sold'] = sold.text.split(" ")[0]
        # 评分
        # score = one.find_element_by_class_name("eXPaM")
        # row['score'] = score.text
        if globalConfig['rules']['isNew']:
            # 加入到行数据内
            if row['isNew'] and int(row['sold']) > globalConfig['rules']['soldGt'] and not titleFlag:
                rows.append(row)
        else:
            # 加入到行数据内
            if int(row['sold']) > globalConfig['rules']['soldGt'] and not titleFlag:
                rows.append(row)
    return rows
    pass

# 保存到excel里
def save_to_excel(rows):
    wb = xlwt.Workbook()
    style = xlwt.XFStyle()  # 创建一个样式对象，初始化样式
    al = xlwt.Alignment()
    al.horz = 0x02  # 设置水平居中
    al.vert = 0x01  # 设置垂直居中
    style.alignment = al

    sheetName = "选品(新品)"
    ws = wb.add_sheet(sheetName)
    heads = ["商品名称","商品地址","店铺","销售量","评分"]
    c = 0
    for head in heads:
        ws.write(0,c,head,style)
        c = c + 1
    col0 = ws.col(0)
    col1 = ws.col(1)
    col2 = ws.col(2)
    col0.width = 256 * 100
    col1.width = 256 * 100
    col2.width = 256 * 50
    # 行号，从第一行开始
    r = 1
    for row in rows:
        # 第一列，商品名称
        ws.write(r, 0, row['title'])
        # 商品地址
        ws.write(r, 1, row['href'])
        # 第二列，店铺
        ws.write(r, 2, xlwt.Formula('"' + row['storeName'] + ' " & HYPERLINK("' + row['storeHref'] + '")'))
        # 第三列，销售量
        ws.write(r,3,row['sold'],style)
        # 第四列，评分
        ws.write(r, 4, row['score'], style)
        r = r + 1
    wbName = 'mmt.xls'
    wb.save(wbName)
    print('成功写入->', wbName)
    pass

# 关闭当前tab，回到第一个tab
def re_tab(browser):
    browser.close()
    handles = browser.window_handles
    # 切换到im tab页
    browser.switch_to_window(handles[0])
    pass

# 发送微信信息
def send_message_wx(message):
    pyautogui.typewrite(message)
    pyautogui.keyDown('enter')
    pass

# 满足条件，评论数小于200
def over_condition(browser,row,key,globalConfig):
    # 打开详情页
    print(row['href']+" 判断中...")
    strIndex = 'tab2'
    browser.execute_script("window.open('about:blank','"+strIndex+"')")
    browser.switch_to.window(strIndex)
    browser.get(row['href'])
    reviewText = browser.find_element_by_class_name("product-reviewer-reviews").text
    print("reviewText="+reviewText)
    re_tab(browser)
    review = int(reviewText.split(" ")[0])
    sold = int(row['sold'])
    print(str(sold) + "/" + str(review) + "=" + str((sold/review)))
    number = int(sold/review)
    if number >= globalConfig['rules']['rsDiv']['min'] and not number > globalConfig['rules']['rsDiv']['max']:
        message = "["+key+"]找到新品且销售量大于"+str(globalConfig['rules']['soldGt'])+"，销售量/评论数=["+str(sold/review)+"]倍的产品："+row['href']
        logging.info(message)
        # wxMsg = "["+key+"]New Sold > 200，sold/review=["+str(number)+"]："+row['href']
        # send_message_wx(wxMsg)
        print(message)
        return True
    else:
        return False
    pass

# 过滤详情
def filter_detail(browser, rows,key,globalConfig):
    arr = []
    index = 1
    for row in rows:
        # result = _thread.start_new_thread(over_condition,(browser,row,index))
        result = over_condition(browser,row,key,globalConfig)
        if result:
            arr.append(row)
        index = index + 1
    return arr
    pass


def pre_config(key):
    # 日志配置
    logging.basicConfig(filename=key+'.log', level=logging.INFO, format='%(asctime)s %(message)s')
    # 加载配置
    config = {}
    with open('config.json') as f:
        config = json.loads(f.read())
    return config
    pass


def main(key,page):
    # 预配置
    globalConfig = pre_config(key)
    url = globalConfig['urlMap'][key]
    url = url + "?SortType=total_tranpro_desc&page="
    url = url + str(page) + "&groupsort=1"
    # url = url + "?SortType=default&page="
    # url = url + str(page)
    browser = create_browser()

    # 打开网页
    print("["+key+"]开始搜寻："+url)
    browser.get(url)

    # 滑动到页面底部，确保能够加载到当前页的60个
    scroll(count=12, browser=browser)
    print("滑动到底部")

    # 获取新品且销售量大于200的
    rows = get_data(browser,globalConfig)
    print("满足条件商品数[新品标签且销售量大于"+str(globalConfig['rules']['soldGt'])+"]=" + str(len(rows)))

    # 打开详情，判断 销量/评论数>5
    rows = filter_detail(browser,rows,key,globalConfig)
    # jsonData = json.dumps(rows)
    # print(jsonData)
    print("满足条件商品数[且评分在"+str(globalConfig['rules']['rsDiv']['min'])+"-"+str(globalConfig['rules']['rsDiv']['min'])+"之间的]="+str(len(rows)))
    # 存到excel里
    # save_to_excel(rows)

    # 关闭
    browser.close()
    pass

if __name__ == "__main__":
    main(sys.argv[1],sys.argv[2])
    # main('sneakers',4)







